// firstStreetID.do
// Inputs: prop_geo_EC, 1kmEC_td, EC, addressEC, fsid_`state'
// Outputs: ECandFSfull, prop_geo_EC_merge, ECandFS1-10, fsid, propIDfsid
// Date last updated: 1/27/2025

// This file matches First Street properties (identified with FSID) with our dataset of CoreLogic properties (identified by prop_id_dw). This merging is done based on both property coordinates (which are done in several iterations, as they can be inexact) and on street address. 
// The raw First Street data was obtained using an API which is no longer in use; First Street data can now be accessed using RADAR.

********************************************************************************	
* Load First Street (FS) coordinates and merge with East Coast (EC) CoreLogic data
********************************************************************************
// Use FS data by coordinates, fix rounding errors, and save intermediate file for merging
	use "$pathi\prop_geo_EC"
	rename prop_long x1
	rename prop_lat y1
	tostring x1, replace force
	tostring y1, replace force
	replace x1=substr(x1,1,10)
	replace y1=substr(y1,1,9)
	gen x6th = substr(x1, 10,.)
	gen y6th = substr(y1, 9,.)
	destring y6th, replace
	destring x6th, replace
	gen y2=substr(y1,1,8)
	gen x2=substr(x1,1,9)
	destring y2, replace
	destring x2, replace
	replace y2 = y2 + .00001 if y6th >= 5
	replace x2 = x2 - .00001 if x6th >= 5
	drop x1 y1 x6th y6th
	duplicates drop x2 y2, force
	save "$pathi\prop_geo_EC_merge", replace

// Import CoreLogic data and clean latitude/longitude
	use "$pathi\1kmEC_td.dta", clear
	gen x1 = _X
	gen y1 = _Y
	tostring x1, replace force
	tostring y1, replace force
	replace x1=substr(x1,1,10)
	replace y1=substr(y1,1,9)
	gen x6th = substr(x1, 10,.)
	gen y6th = substr(y1, 9,.)
	destring y6th, replace
	destring x6th, replace
	gen y2=substr(y1,1,8)
	gen x2=substr(x1,1,9)
	destring y2, replace
	destring x2, replace
	replace y2 = y2 + .00001 if y6th >= 5
	replace x2 = x2 - .00001 if x6th >= 5
	sort x2 y2
	
// Save different versions of coordinate-level merge between FS and CoreLogic for rounding
	* We will change x and y slightly (by + .00001 or - .00001 and alternate between different combinations. This will allow us to merge many extra FSIDs due to tiny rounding errors)
preserve
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS1.dta", replace
	restore
	preserve
	replace y2 = y2 + .00001
	sort x2 y2
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS2.dta", replace
restore
preserve
	replace y2 = y2 - .00001
	sort x2 y2
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS3.dta", replace
restore
preserve
	replace y2 = y2 - .00002
	sort x2 y2
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS4.dta", replace
restore
preserve
	replace x2 = x2 + .00001
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS5.dta", replace
restore
preserve
	sort x2 y2
	replace x2 = x2 - .00001
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS6.dta", replace
restore
preserve
	replace x2 = x2 + .00001
	replace y2 = y2 + .00001
	sort x2 y2
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS7.dta", replace
restore
preserve
	replace x2 = x2 - .00001
	replace y2 = y2 - .00001
	sort x2 y2
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS8.dta", replace
restore
preserve
	replace x2 = x2 + .00001
	replace y2 = y2 - .00001
	sort x2 y2
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS9.dta", replace
restore
	replace x2 = x2 - .00001
	replace y2 = y2 + .00001
	sort x2 y2
	merge m:1 x2 y2 using "$pathi\prop_geo_EC_merge", force keep(match)
	keep if _merge == 3
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFS10.dta", replace

// Now we have merged these into our Corelogic dataset and need to figure out which properties do NOT have an associated FSID
	
	use "$pathi\EC", clear
	merge m:1 prop_id_dw using "$pathi\ECandFS1", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS2", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS3", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS4", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS5", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS6", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS7", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS8", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS9", force keep(master) nogenerate
	merge m:1 prop_id_dw using "$pathi\ECandFS10", force keep(master) nogenerate
	duplicates drop prop_id_dw, force
	sort prop_id_dw 
	drop _merge
	
// Merge with address and run that through the FirstStreet API code	
	merge m:1 prop_id_dw using "$pathi\addressEC", force keep(match)
	keep if _merge == 3
	destring st_num, replace
	tostring st_num, replace
	gen address = st_num + " " + st_direction + " " + st_name + " " + st_sfx + " " + quadrant + ", " + city + ", " + state
	duplicates drop address, force
	keep prop_id_dw address
	sort address

// Append together FSID datasets from each state
preserve 
	* From FSID, we have the following datasets - must append them together
	use "$pathi\fsid_ct.csv", clear
	append using "$pathi\fsid_de.csv"
	append using "$pathi\fsid_fl1.csv"
	append using "$pathi\fsid_fl2.csv"
	append using "$pathi\fsid_fl3.csv"
	append using "$pathi\fsid_fl4.csv"
	append using "$pathi\fsid_fl5.csv"
	append using "$pathi\fsid_ga1.csv"
	append using "$pathi\fsid_ga2.csv"
	append using "$pathi\fsid_ga3.csv"
	append using "$pathi\fsid_ma2.csv"
	append using "$pathi\fsid_ma1.csv"
	append using "$pathi\fsid_me.csv"
	append using "$pathi\fsid_nc.csv"
	append using "$pathi\fsid_nh.csv"
	append using "$pathi\fsid_ny.csv"
	append using "$pathi\fsid_pa.csv"
	append using "$pathi\fsid_ri.csv"
	append using "$pathi\fsid_sc.csv"
	append using "$pathi\fsid_va1.csv"
	append using "$pathi\fsid_va2.csv"
	append using "$pathi\fsid_va3.csv"
	
	drop if valid_id == "FALSE"
	sort address
	duplicates drop address, force
	save "$pathi\fsid", replace
restore
	
// Now attach to our prop_id_address dataset
	merge 1:1 address using "$pathi\fsid"
	drop if _merge == 1
	keep prop_id_dw fsid
	save "$pathi\propIDfsid", replace

// Append together
	use "$pathi\ECandFS1.dta", clear
	append using "$pathi\ECandFS2.dta"
	append using "$pathi\ECandFS3.dta"
	append using "$pathi\ECandFS4.dta"
	append using "$pathi\ECandFS5.dta"
	append using "$pathi\ECandFS6.dta"
	append using "$pathi\ECandFS7.dta"
	append using "$pathi\ECandFS8.dta"
	append using "$pathi\ECandFS9.dta"
	append using "$pathi\ECandFS10.dta"
	tostring(fsid), replace
	append using "$pathi\propIDfsid"
	
// Clean and save combined dataset of matched CoreLogic property IDs and First Street IDs
	sort prop_id_dw
	drop _merge
	keep prop_id_dw fsid
	duplicates drop prop_id_dw, force
	save "$pathi\ECandFSfull.dta", replace